library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(RColorBrewer)
library(nloptr)
library(reshape2)
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(bnlearn)
##
## Attaching package: 'bnlearn'
## The following object is masked from 'package:stats':
##
## sigma
library(stringr)
df.tcr = read.table("../result/backbone.txt", header = T, sep = "\t") %>%
mutate(tcr_chain = as.factor(substr(as.character(tcr_v_allele), 1, 3)))
df.ag = read.table("../result/backbone_ag.txt", header = T, sep = "\t")
df.master = read.table("../result/structure.txt", header=T, sep="\t") %>%
filter(tcr_region %in% c("CDR1", "CDR2", "CDR3")) %>%
mutate(tcr_chain = as.factor(substr(as.character(tcr_v_allele), 1, 3))) %>%
droplevels
Modelling \(x_i = f(l, L)\) for \(C\alpha\) atoms. CDRs and antigen structures are transformed as follows:
First lets have a look at these functions for TCR
ggplot(df.tcr, aes(x = pos_tcr / (len_tcr - 1), y = x)) +
geom_line(aes(group = pdb_id, color = len_tcr), alpha = 0.8) +
scale_color_gradientn(colors=colorRampPalette(rev(brewer.pal(11, 'Spectral')))(32)) +
facet_grid(tcr_region~tcr_chain)
ggplot(df.tcr, aes(x = pos_tcr / (len_tcr - 1), y = y)) +
geom_line(aes(group = pdb_id, color = len_tcr), alpha = 0.8) +
scale_color_gradientn(colors=colorRampPalette(rev(brewer.pal(11, 'Spectral')))(32)) +
facet_grid(tcr_region~tcr_chain)
ggplot(df.tcr, aes(x = pos_tcr / (len_tcr - 1), y = z)) +
geom_line(aes(group = pdb_id, color = len_tcr), alpha = 0.8) +
scale_color_gradientn(colors=colorRampPalette(rev(brewer.pal(11, 'Spectral')))(32)) +
facet_grid(tcr_region~tcr_chain)
R direction
ggplot(df.tcr, aes(x = pos_tcr, y = z_cb - z)) +
geom_line(aes(group = interaction(pdb_id, tcr_chain)), alpha = 0.5) +
scale_color_gradientn(colors=colorRampPalette(rev(brewer.pal(11, 'Spectral')))(32)) +
facet_wrap(~len_tcr)
and antigen
ggplot(df.ag, aes(x = pos_ag / (len_ag - 1), y = x)) +
geom_line(aes(group = pdb_id, color = len_ag), alpha = 0.8) +
scale_color_gradientn(colors=colorRampPalette(rev(brewer.pal(11, 'Spectral')))(32)) +
facet_grid(.~mhc_type)
ggplot(df.ag, aes(x = pos_ag / (len_ag - 1), y = y)) +
geom_line(aes(group = pdb_id, color = len_ag), alpha = 0.8) +
scale_color_gradientn(colors=colorRampPalette(rev(brewer.pal(11, 'Spectral')))(32)) +
facet_grid(.~mhc_type)
ggplot(df.ag, aes(x = pos_ag / (len_ag - 1), y = z)) +
geom_line(aes(group = pdb_id, color = len_ag), alpha = 0.8) +
scale_color_gradientn(colors=colorRampPalette(rev(brewer.pal(11, 'Spectral')))(32)) +
facet_grid(.~mhc_type)
R direction
ggplot(df.ag, aes(x = pos_ag, y = z_cb - z)) +
geom_line(aes(group = pdb_id), alpha = 0.5) +
facet_wrap(~len_ag)
Functions to fit
calc_x = function(l, L, params) {
H = params[1] + params[2] * L
a = params[3]
b = params[4]
d = params[5] + params[6] * L # distance from C to F/W
x = l / (L - 1)
return(x * d + H * x ^ (a - 1) * (1 - x) ^ (b - 1) / beta(a, b))
}
param_ranges_x = data.frame(axis = "x",
x0 = c(10, 0.5, 1.5, 1.5, 5, 0.5),
lb = c(0, 0, 1.001, 1.001, 0, 0),
ub = c(20, 5, 10, 10, 50, 5))
calc_y = function(l, L, params) {
H = params[1] + params[2] * L
a1 = params[3]
b1 = params[4]
a2 = params[5]
b2 = params[6]
x = l / (L - 1)
return(H * (x ^ (a2 - 1) * (1 - x) ^ (b2 - 1) / beta(a2, b2) - x ^ (a1 - 1) * (1 - x) ^ (b1 - 1) / beta(a1, b1)))
}
param_ranges_y = data.frame(axis = "y",
x0 = c(10, 0.5, 2, 5, 5, 2),
lb = c(0, 0, 1.001, 1.001, 1.001, 1.001),
ub = c(20, 2, 10, 10, 10, 10))
calc_z = function(l, L, params) {
H = params[1] + params[2] * L
a = params[3]
b = params[4]
x = l / (L - 1)
return(H * x ^ (a - 1) * (1 - x) ^ (b - 1) / beta(a, b))
}
param_ranges_z = data.frame(axis = "z",
x0 = c(10, 0.5, 1.5, 1.5),
lb = c(0, 0, 1.001, 1.001),
ub = c(20, 2, 10, 10))
params_cdr = rbind(param_ranges_x, param_ranges_y, param_ranges_z)
calc_coord = function(l, L, params, axis) {
switch(axis,
x = calc_x(l, L, params),
y = calc_y(l, L, params),
z = calc_z(l, L, params))
}
Optimization goes here
params_fitted_tcr = data.frame()
for (t in list(c("CDR1", "CDR2"), "CDR3")) {
for (a in c("x", "y", "z")) {
print(paste("Fitting", paste(t), a))
# dplyr::select starting params and bounds
params_nloptr = params_cdr %>% filter(axis == a)
# prepare data
df.fit = df.tcr %>% filter(tcr_region %in% t)
df.fit$value = df.fit[,a]
df.fit$pos = df.fit$pos_tcr
df.fit$len = df.fit$len_tcr
# generic objective
obj = function(params, data) with(data, mean((value - calc_coord(pos, len, params, a))^2))
res = with(params_nloptr,
nloptr(x0 = x0, lb = lb, ub = ub,
eval_f = obj,
data = df.fit,
opts = list(algorithm = "NLOPT_LN_SBPLX", maxeval = 1e5)))
for (tt in t) {
solution = data.frame(tcr_region = tt,
axis = a,
params = res$solution)
params_fitted_tcr = rbind(params_fitted_tcr, solution)
}
print(res)
}
}
## [1] "Fitting CDR1 x" "Fitting CDR2 x"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 3033
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 3.06112324554689
## Optimal value of controls: 0.3111275 0 10 3.138592 4.203514 0.8356583
##
##
## [1] "Fitting CDR1 y" "Fitting CDR2 y"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 6524
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.735020819436763
## Optimal value of controls: 0 2 3.995074 3.347118 4.072782 3.340119
##
##
## [1] "Fitting CDR1 z" "Fitting CDR2 z"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 8483
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.862567735062751
## Optimal value of controls: 0 0.5090817 2.073567 2.124372
##
##
## [1] "Fitting CDR3 x"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 5017
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 4.20342129647816
## Optimal value of controls: 1.169303 0.1893118 2.172392 1.839203 5.88275 0
##
##
## [1] "Fitting CDR3 y"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 40185
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 2.0577584357609
## Optimal value of controls: 7.398926 0.2933037 3.519635 2.159706 4.353659 2.455754
##
##
## [1] "Fitting CDR3 z"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 941
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 1.30010418614403
## Optimal value of controls: 1.560094 0.5181191 2.392882 2.431665
Check solutions
calc_coord_tcr_fitted = function(l, L, tcr_region, axis) {
t = as.character(tcr_region[1])
a = as.character(axis[1])
params = (params_fitted_tcr %>% filter(tcr_region == t, axis == a))$params
calc_coord(l, L, params, a)
}
df.tcr.melt = melt(df.tcr, measure.vars = c("x", "y", "z"))
df.tcr.melt = df.tcr.melt %>%
group_by(tcr_region, variable) %>%
mutate(value_fit = calc_coord_tcr_fitted(pos_tcr, len_tcr, tcr_region, variable))
ggplot(df.tcr.melt %>% filter(variable == "x"), aes(x = pos_tcr / (len_tcr - 1))) +
geom_line(aes(group = interaction(pdb_id, tcr_chain, tcr_region), y = value), alpha = 0.8) +
geom_line(aes(group = tcr_region, y = value_fit), color = "red") +
ylab("x") +
facet_wrap(~len_tcr)
ggplot(df.tcr.melt %>% filter(variable == "y"), aes(x = pos_tcr / (len_tcr - 1))) +
geom_line(aes(group = interaction(pdb_id, tcr_chain, tcr_region), y = value), alpha = 0.8) +
geom_line(aes(group = tcr_region, y = value_fit), color = "red") +
ylab("y") +
facet_wrap(~len_tcr)
ggplot(df.tcr.melt %>% filter(variable == "z"), aes(x = pos_tcr / (len_tcr - 1))) +
geom_line(aes(group = interaction(pdb_id, tcr_chain, tcr_region), y = value), alpha = 0.8) +
geom_line(aes(group = tcr_region, y = value_fit), color = "red") +
ylab("z") +
facet_wrap(~len_tcr)
Optimization goes here
params_fitted_ag = data.frame()
for (m in c("MHCI", "MHCII")) {
for (a in c("x", "y", "z")) {
print(paste("Fitting", m, a))
# dplyr::select starting params and bounds
params_nloptr = params_cdr %>% filter(axis == a)
# prepare data
df.fit = df.ag %>% filter(mhc_type %in% m)
df.fit$value = df.fit[,a]
df.fit$pos = df.fit$pos_ag
df.fit$len = df.fit$len_ag
# generic objective
obj = function(params, data) with(data, mean((value - calc_coord(pos, len, params, a))^2))
res = with(params_nloptr,
nloptr(x0 = x0, lb = lb, ub = ub,
eval_f = obj,
data = df.fit,
opts = list(algorithm = "NLOPT_LN_SBPLX", maxeval = 1e5)))
solution = data.frame(mhc_type = m,
axis = a,
params = res$solution)
params_fitted_ag = rbind(params_fitted_ag, solution)
print(res)
}
}
## [1] "Fitting MHCI x"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 1012
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 1.36767361381065
## Optimal value of controls: 0 0.03043346 2.107937 10 16.19432 0.6542352
##
##
## [1] "Fitting MHCI y"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 2567
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.667542606283011
## Optimal value of controls: 0.4680786 0 1.95011 1.001 10 1.435702
##
##
## [1] "Fitting MHCI z"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 4407
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 1.66946869482074
## Optimal value of controls: 0 0.3534113 2.992723 2.545585
##
##
## [1] "Fitting MHCII x"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 2884
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.764408744994204
## Optimal value of controls: 0 0.03467443 4.449276 2.066645 1.613794 2.626012
##
##
## [1] "Fitting MHCII y"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 3169
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 1.3347037919981
## Optimal value of controls: 0 0.009004148 3.204594 10 1.22578 1.064003
##
##
## [1] "Fitting MHCII z"
##
## Call:
##
## nloptr(x0 = x0, eval_f = obj, lb = lb, ub = ub, opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 2672
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 2.73296867021219
## Optimal value of controls: 0 0.227257 1.843548 1.766022
Check solutions
calc_coord_ag_fitted = function(l, L, mhc_type, axis) {
m = as.character(mhc_type[1])
a = as.character(axis[1])
params = (params_fitted_ag %>% filter(mhc_type == m, axis == a))$params
calc_coord(l, L, params, a)
}
df.ag.melt = melt(df.ag, measure.vars = c("x", "y", "z"))
df.ag.melt = df.ag.melt %>%
group_by(mhc_type, variable) %>%
mutate(value_fit = calc_coord_ag_fitted(pos_ag, len_ag, mhc_type, variable))
ggplot(df.ag.melt %>% filter(variable == "x"), aes(x = pos_ag / (len_ag - 1))) +
geom_line(aes(group = pdb_id, y = value), alpha = 0.8) +
geom_line(aes(group = mhc_type, y = value_fit), color = "red") +
ylab("x") +
facet_wrap(~len_ag)
ggplot(df.ag.melt %>% filter(variable == "y"), aes(x = pos_ag / (len_ag - 1))) +
geom_line(aes(group = pdb_id, y = value), alpha = 0.8) +
geom_line(aes(group = mhc_type, y = value_fit), color = "red") +
ylab("y") +
facet_wrap(~len_ag)
ggplot(df.ag.melt %>% filter(variable == "z"), aes(x = pos_ag / (len_ag - 1))) +
geom_line(aes(group = pdb_id, y = value), alpha = 0.8) +
geom_line(aes(group = mhc_type, y = value_fit), color = "red") +
ylab("z") +
facet_wrap(~len_ag)
Parameters of distance function specify separation across Z axis and yaw and pitch rotation
calc_dist = function(x_tcr, y_tcr, z_tcr, x_ag, y_ag, z_ag, x_tcr_max, x_ag_max, params) {
z_tcr = -z_tcr + params[1]
x_tcr = x_tcr - x_tcr_max / 2
x_ag = x_ag - x_ag_max / 2
cs1 = cos(params[2])
ss1 = sin(params[2])
cs2 = cos(params[3])
ss2 = sin(params[3])
x_tcr = x_tcr * cs1 - y_tcr * ss1
y_tcr = x_tcr * ss1 + y_tcr * cs1
x_tcr = x_tcr * cs2 - z_tcr * ss2
z_tcr = x_tcr * ss2 + z_tcr * cs2
return(sqrt((x_tcr - x_ag) ^ 2 + (y_tcr - y_ag) ^ 2 + (z_tcr - z_ag) ^ 2))
}
Optimization goes here
params_fitted_orientation = data.frame()
df.distest.summary = df.master %>%
group_by(pdb_id, tcr_chain, tcr_region, mhc_type) %>%
summarize(bad_entity = min(distance_CA) >= 30)
df.distest = merge(df.master, df.distest.summary) %>% filter(!bad_entity)
for (t in c("CDR1", "CDR2", "CDR3")) {
for (ch in c("TRA", "TRB")) {
for (m in c("MHCI", "MHCII")) {
print(paste("Fitting orientation for", t, ch, m))
df.fit = df.distest %>%
filter(tcr_region == t, tcr_chain == ch, mhc_type == m) %>%
dplyr::select(pdb_id, pos_tcr, len_tcr, pos_antigen, len_antigen, distance_CA)
df.fit$x_tcr = calc_coord_tcr_fitted(df.fit$pos_tcr, df.fit$len_tcr, t, "x")
df.fit$y_tcr = calc_coord_tcr_fitted(df.fit$pos_tcr, df.fit$len_tcr, t, "y")
df.fit$z_tcr = calc_coord_tcr_fitted(df.fit$pos_tcr, df.fit$len_tcr, t, "z")
df.fit$x_ag = calc_coord_ag_fitted(df.fit$pos_antigen, df.fit$len_antigen, m, "x")
df.fit$y_ag = calc_coord_ag_fitted(df.fit$pos_antigen, df.fit$len_antigen, m, "y")
df.fit$z_ag = calc_coord_ag_fitted(df.fit$pos_antigen, df.fit$len_antigen, m, "z")
df.fit = df.fit %>%
group_by(pdb_id) %>%
mutate(x_tcr_max = max(x_tcr), x_ag_max = max(x_ag))
obj = function(params, data) with(data, mean((1/distance_CA -
1/calc_dist(x_tcr, y_tcr, z_tcr,
x_ag, y_ag, z_ag,
x_tcr_max, x_ag_max,
params))^2))
res = with(params_nloptr,
nloptr(x0 = c(20, pi/2, 0), lb = c(0, 0, -pi/2), ub = c(100, 2*pi, pi/2),
eval_f = obj,
data = df.fit,
opts = list(algorithm = "NLOPT_LN_SBPLX", maxeval = 1e5)))
sola = res$solution
sola[2] = sola[2] / pi * 180
sola[3] = sola[3] / pi * 180
solution = data.frame(mhc_type = m,
tcr_chain = ch,
tcr_region = t,
params = res$solution,
params2 = sola,
param_name = c("height", "yaw", "pitch"))
params_fitted_orientation = rbind(params_fitted_orientation, solution)
print(res)
}
}
}
## [1] "Fitting orientation for CDR1 TRA MHCI"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 476
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.000281677583924834
## Optimal value of controls: 25.83908 0 0.566778
##
##
## [1] "Fitting orientation for CDR1 TRA MHCII"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 652
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.00029200882916876
## Optimal value of controls: 28.22229 0 0.5941197
##
##
## [1] "Fitting orientation for CDR1 TRB MHCI"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 338
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.00030045425600131
## Optimal value of controls: 28.6697 3.189475 -0.5691854
##
##
## [1] "Fitting orientation for CDR1 TRB MHCII"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 418
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.000306608745791826
## Optimal value of controls: 30.7937 2.358328 -0.5602491
##
##
## [1] "Fitting orientation for CDR2 TRA MHCI"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 359
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.000160408768642331
## Optimal value of controls: 26.13596 2.009212 0.3564626
##
##
## [1] "Fitting orientation for CDR2 TRA MHCII"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 601
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 7.55625440907815e-05
## Optimal value of controls: 23.59515 4.34877 0.2376385
##
##
## [1] "Fitting orientation for CDR2 TRB MHCI"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 1793
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.000269644602436689
## Optimal value of controls: 24.00763 1.306104 -0.4031764
##
##
## [1] "Fitting orientation for CDR2 TRB MHCII"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 208
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.000194616634150638
## Optimal value of controls: 25.94113 2.020967 -0.389075
##
##
## [1] "Fitting orientation for CDR3 TRA MHCI"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 1457
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.000381322969460991
## Optimal value of controls: 27.64579 4.302396 0.3061143
##
##
## [1] "Fitting orientation for CDR3 TRA MHCII"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 1674
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.000369182433703871
## Optimal value of controls: 32.49867 4.851156 0.4686846
##
##
## [1] "Fitting orientation for CDR3 TRB MHCI"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 1154
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.000440011128484609
## Optimal value of controls: 29.07562 0.8822835 -0.3010867
##
##
## [1] "Fitting orientation for CDR3 TRB MHCII"
##
## Call:
## nloptr(x0 = c(20, pi/2, 0), eval_f = obj, lb = c(0, 0, -pi/2),
## ub = c(100, 2 * pi, pi/2), opts = list(algorithm = "NLOPT_LN_SBPLX",
## maxeval = 1e+05), data = df.fit)
##
##
## Minimization using NLopt version 2.4.2
##
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because
## xtol_rel or xtol_abs (above) was reached. )
##
## Number of Iterations....: 1072
## Termination conditions: maxeval: 1e+05
## Number of inequality constraints: 0
## Number of equality constraints: 0
## Optimal value of objective function: 0.000237298581665191
## Optimal value of controls: 29.8334 1.793355 -0.3507075
Take a look at fitted values
params_fitted_orientation1 = dcast(params_fitted_orientation, mhc_type + tcr_chain + tcr_region ~ param_name, value.var = "params2")
params_fitted_orientation2 = params_fitted_orientation1
params_fitted_orientation2$yaw = params_fitted_orientation2$yaw + 180
params_fitted_orientation2$pitch = NA
params_fitted_orientation1 = rbind(params_fitted_orientation1, params_fitted_orientation2)
params_fitted_orientation1$yaw = ifelse(params_fitted_orientation1$yaw > 360,
params_fitted_orientation1$yaw - 360,
ifelse(params_fitted_orientation1$yaw < 0,
360 + params_fitted_orientation1$yaw,
params_fitted_orientation1$yaw))
params_fitted_orientation1$pitch = ifelse(params_fitted_orientation1$pitch > 360,
params_fitted_orientation1$pitch - 360,
ifelse(params_fitted_orientation1$pitch < 0,
360 + params_fitted_orientation1$pitch,
params_fitted_orientation1$pitch))
ggplot(params_fitted_orientation1, aes(x = yaw, y = 1, color = tcr_region)) +
geom_bar(stat="identity") +
geom_point() +
coord_polar(theta = "x") +
scale_x_continuous("yaw (rotation wrt Z axis)", breaks=seq(0, 360, by=30), expand=c(0,0), lim=c(0, 360)) +
facet_grid(mhc_type ~ tcr_chain)
ggplot(params_fitted_orientation1, aes(x = pitch, y = height, color = tcr_region)) +
geom_bar(stat="identity") +
geom_point() +
coord_polar(theta = "x") +
scale_x_continuous("pitch (rotation wrt Y axis)", breaks=seq(0, 360, by=30), expand=c(0,0), lim=c(0, 360)) +
facet_grid(mhc_type ~ tcr_chain)
## Warning: Removed 12 rows containing missing values (position_stack).
## Warning: Removed 12 rows containing missing values (geom_point).
General purpose function for computing distances
compute_distance_slave = function(pdb_id, pos_tcr, len_tcr, pos_antigen, len_antigen, tcr_chain, tcr_region, mhc_type) {
tc = as.character(tcr_chain[1])
tr = as.character(tcr_region[1])
mt = as.character(mhc_type[1])
x_tcr = calc_coord_tcr_fitted(pos_tcr, len_tcr, tr, "x")
y_tcr = calc_coord_tcr_fitted(pos_tcr, len_tcr, tr, "y")
z_tcr = calc_coord_tcr_fitted(pos_tcr, len_tcr, tr, "z")
x_ag = calc_coord_ag_fitted(pos_antigen, len_antigen, mt, "x")
y_ag = calc_coord_ag_fitted(pos_antigen, len_antigen, mt, "y")
z_ag = calc_coord_ag_fitted(pos_antigen, len_antigen, mt, "z")
max_x = data.frame(pdb_id, x_tcr, x_ag) %>%
group_by(pdb_id) %>%
mutate(x_tcr_max = max(x_tcr), x_ag_max = max(x_ag))
params = (params_fitted_orientation %>%
filter(tcr_chain == tc & tcr_region == tr & mhc_type == mt))$params
calc_dist(x_tcr, y_tcr, z_tcr, x_ag, y_ag, z_ag, max_x$x_tcr_max, max_x$x_ag_max, params)
}
compute_distances_master = function(data) {
data %>%
group_by(tcr_chain, tcr_region, mhc_type) %>%
mutate(distance_est = compute_distance_slave(pdb_id, pos_tcr, len_tcr, pos_antigen, len_antigen, tcr_chain, tcr_region, mhc_type))
}
df.distest = compute_distances_master(df.distest)
Now lets take a look at overall results
ggplot(df.distest, aes(x = as.integer(distance_CA), group = as.integer(distance_CA), y = distance_est)) +
geom_boxplot() +
facet_grid(mhc_type~tcr_chain+tcr_region) +
geom_abline(slope = 1, intercept = 0, color="red", linetype="dashed") +
scale_x_continuous(limits = c(0, 30)) +
scale_y_continuous(limits = c(0, 30)) +
theme_bw()
## Warning: Removed 4477 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
We will use the data frame with estimated distances from previous section. Contacts are defined as amino acid pairs that have interaction energy less than \(0\). A simple naive bayes network is used to fit the values.
E_NOISE_THRESHOLD = 0
df.bn.orig = df.distest %>%
mutate(contact = energy < E_NOISE_THRESHOLD, contact_f = as.factor(contact),
energy = ifelse(energy > 0, 0, energy),
aa_pair = as.factor(ifelse(as.character(aa_tcr) < as.character(aa_antigen),
paste(aa_tcr, aa_antigen, sep = "_"), paste(aa_antigen, aa_tcr, sep = "_"))))
df.bn = df.bn.orig %>%
filter(!is.na(contact)) %>%
droplevels
df.bn.fc = df.bn %>%
group_by(pdb_id) %>%
summarize(few_contacts = sum(contact) < 5) %>%
filter(few_contacts)
df.bn = df.bn %>%
filter(!(pdb_id %in% df.bn.fc$pdb_id))
Plot distance distribution for contacts and non-contacts:
ggplot(df.bn, aes(x = distance_CA, fill = contact)) +
geom_histogram(binwidth = 1) +
facet_grid(mhc_type~tcr_chain+tcr_region, scales="free_y") +
theme_bw()
Focus on TCR amino acid
ggplot(df.bn, aes(x = distance_est, fill = contact)) +
geom_histogram(binwidth = 1) +
facet_wrap(~aa_tcr, scales="free_y") +
theme_bw()
Focus on TCR amino acid, real energy values, color by antigen amino acid
ggplot(df.bn, aes(x = distance_est, y = energy, fill = aa_antigen)) +
geom_point(shape=21) +
facet_wrap(~aa_tcr) +
theme_bw() +
theme(legend.position="bottom") + guides(fill=guide_legend(nrow=2, byrow=T))
ggplot(df.bn %>% filter(energy < 0 & aa_tcr == "R"), aes(x = distance_est, y = energy)) +
geom_point(shape=21) +
facet_wrap(~aa_antigen) +
theme_bw()
Distance discretization, all residues with \(d > 21\) automatically get \(0\) contact probability. Distances from \(5..21\) are put into \(2\) angstrom bins
ggplot(subset(df.bn, distance_est <= 21), aes(x = pmax(5, distance_est), fill = contact)) +
geom_histogram(binwidth = 2) +
facet_grid(contact~., scales = "free_y")
Prepare dataset once more
df.bn.fit = df.bn %>%
filter(distance_est <= 21)
df.bn.fit = as.data.frame(df.bn.fit)
df.bn.fit$distance_est_f = as.factor(as.integer(pmax(0, df.bn.fit$distance_est - 5) / 2) + 1)
df.bn.fit$odd_tcr = as.factor(df.bn.fit$pos_tcr %% 2 == 1)
df.bn.fit$odd_antigen = as.factor(df.bn.fit$pos_antigen %% 2 == 1)
levels(df.bn.fit$distance_est_f)
## [1] "1" "2" "3" "4" "5" "6" "7" "8"
Our very simple model
emp_net = model2network(paste(
"[contact_f]",
"[aa_pair|contact_f]",
"[odd_tcr|contact_f]",
"[odd_antigen|contact_f]",
"[distance_est_f|odd_tcr:odd_antigen:contact_f]",
sep =""))
graphviz.plot(emp_net)
## Loading required namespace: Rgraphviz
Fit the model
df.bn.fit1 = df.bn.fit %>% dplyr::select(aa_pair,
distance_est_f,
odd_tcr,
odd_antigen,
contact_f)
fit = bn.fit(emp_net,
df.bn.fit1, method="bayes")
BIC(fit, df.bn.fit1)
## [1] -322586.5
Compute the probabilities and plot the ROC curve
res = predict(fit, node="contact_f", method="bayes-lw",
data=df.bn.fit1, prob=T)
p = attributes(res)$prob
rocobj = plot.roc(df.bn.fit1[,"contact_f"], p[2,], ci=T)
rocobj
##
## Call:
## plot.roc.default(x = df.bn.fit1[, "contact_f"], predictor = p[2, ], ci = T)
##
## Data: p[2, ] in 32111 controls (df.bn.fit1[, "contact_f"] FALSE) < 5170 cases (df.bn.fit1[, "contact_f"] TRUE).
## Area under the curve: 0.861
## 95% CI: 0.856-0.866 (DeLong)
By region and chain
df.bn.fit$p = p[2,]
df.bn.fit.sum = df.bn.fit %>%
group_by(pdb_id, tcr_chain, mhc_type, tcr_region) %>%
summarize(contacts = sum(contact), contacts_est = sum(p))
ggplot(df.bn.fit.sum, aes(x = contacts_est, y = contacts,color=tcr_chain,shape =mhc_type)) +
geom_point() + geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed") +
facet_wrap(~tcr_region,scales="free")
summary(lm(contacts~contacts_est, subset(df.bn.fit.sum, tcr_region == "CDR3")))
##
## Call:
## lm(formula = contacts ~ contacts_est, data = subset(df.bn.fit.sum,
## tcr_region == "CDR3"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.6066 -3.4295 0.4862 4.4030 13.2580
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.38980 1.17112 7.164 1.14e-11 ***
## contacts_est 0.49607 0.06758 7.340 3.99e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.49 on 222 degrees of freedom
## Multiple R-squared: 0.1953, Adjusted R-squared: 0.1917
## F-statistic: 53.88 on 1 and 222 DF, p-value: 3.989e-12
By chain
df.bn.fit.sum1 = df.bn.fit %>%
group_by(pdb_id, tcr_chain, mhc_type) %>%
summarize(contacts = sum(contact), contacts_est = sum(p))
ggplot(df.bn.fit.sum1, aes(x = contacts_est, y = contacts, color=tcr_chain, shape =mhc_type)) +
geom_point() + geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed")
summary(lm(contacts~contacts_est, df.bn.fit.sum1))
##
## Call:
## lm(formula = contacts ~ contacts_est, data = df.bn.fit.sum1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.9734 -3.6007 0.2963 5.0598 18.0041
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.2019 1.5438 9.847 < 2e-16 ***
## contacts_est 0.3387 0.0627 5.402 1.69e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.576 on 222 degrees of freedom
## Multiple R-squared: 0.1162, Adjusted R-squared: 0.1122
## F-statistic: 29.18 on 1 and 222 DF, p-value: 1.693e-07
Overall
df.bn.fit.sum2 = df.bn.fit %>%
group_by(pdb_id, mhc_type) %>%
summarize(contacts = sum(contact), contacts_est = sum(p))
ggplot(df.bn.fit.sum2, aes(x = contacts_est, y = contacts, shape =mhc_type)) +
geom_point() + geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed")
summary(lm(contacts~contacts_est, df.bn.fit.sum2))
##
## Call:
## lm(formula = contacts ~ contacts_est, data = df.bn.fit.sum2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.344 -6.248 1.076 8.035 23.136
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.98989 3.58490 8.923 1.14e-14 ***
## contacts_est 0.30460 0.07385 4.125 7.24e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.83 on 110 degrees of freedom
## Multiple R-squared: 0.1339, Adjusted R-squared: 0.1261
## F-statistic: 17.01 on 1 and 110 DF, p-value: 7.237e-05
TODO
Compute mean energies (for contacts only)
df.e = df.bn %>%
filter(contact) %>%
group_by(aa_pair) %>%
summarize(E = mean(energy))
df.e$aa_tcr = str_split_fixed(df.e$aa_pair, "_", 2)[, 1]
df.e$aa_antigen = str_split_fixed(df.e$aa_pair, "_", 2)[ ,2]
df.e2 = df.e
df.e2$aa_tcr = df.e$aa_antigen
df.e2$aa_antigen = df.e$aa_tcr
df.e = rbind(df.e, df.e2)
df.e2 = NULL
ggplot(df.e, aes(x=aa_tcr, y = aa_antigen, fill=E)) +
geom_tile() +
scale_fill_gradientn(colors=colorRampPalette(rev(brewer.pal(9, 'YlOrRd')))(32)) +
theme_bw()
Merge with BN predictions
df.final = merge(df.bn.fit, df.e, all.y = T)
df.final$E[is.na(df.final$E)] = 0
df.final = df.final %>%
mutate(pE = p * E)
Summarize energies
df.final.sum = df.final %>%
group_by(pdb_id, tcr_chain, mhc_type, tcr_region) %>%
summarize(E = sum(energy), E_est = sum(pE))
ggplot(df.final.sum, aes(x = E_est, y = E, color=tcr_chain,shape =mhc_type)) +
geom_point() + geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed") +
facet_wrap(~tcr_region,scales="free")
summary(lm(E~E_est, subset(df.final.sum, tcr_region == "CDR3")))
##
## Call:
## lm(formula = E ~ E_est, data = subset(df.final.sum, tcr_region ==
## "CDR3"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -141.22 -24.51 3.36 28.00 90.23
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -32.63521 5.93679 -5.497 1.06e-07 ***
## E_est 0.52684 0.08377 6.289 1.67e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.96 on 222 degrees of freedom
## Multiple R-squared: 0.1512, Adjusted R-squared: 0.1474
## F-statistic: 39.55 on 1 and 222 DF, p-value: 1.675e-09
Example complex
df.final.GLC = df.final %>%
filter(antigen_seq == "GLCTLVAML") %>%
droplevels()
ggplot(df.final.GLC, aes(x=pos_tcr, y=pos_antigen)) +
geom_tile(fill=NA) +
geom_label(aes(label=paste(aa_tcr, aa_antigen, sep=":"), fill = p), cex=1.3) +
geom_point(aes(color=contact)) +
scale_x_continuous(breaks=0:20) +
scale_y_continuous(breaks=0:20) +
scale_fill_gradient("P",
low="white", high="#045a8d") +
scale_color_manual(values = c(NA, "red")) +
facet_grid(tcr_chain ~ tcr_region, scales="free", space="free") +
theme_bw()
## Warning: Removed 241 rows containing missing values (geom_point).
ggplot(df.final.GLC, aes(x=pos_tcr, y=pos_antigen)) +
geom_tile(fill=NA) +
geom_label(aes(label=paste(aa_tcr, aa_antigen, sep=":"), fill = pE), cex=1.3) +
geom_point(aes(color=contact, size = -energy), shape = 21) +
scale_x_continuous(breaks=0:20) +
scale_y_continuous(breaks=0:20) +
scale_fill_gradient("E",
low="#045a8d", high="white") +
scale_color_manual(values = c(NA, "red")) +
facet_grid(tcr_chain ~ tcr_region, scales="free", space="free") +
theme_bw()
## Warning: Removed 241 rows containing missing values (geom_point).